import pandas as pd
import altair as alt
import geopandas as geopandas
import contextily as cx
import folium as folium
from pysal.viz import mapclassify
pd.set_option('display.max_columns', 500)
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')
data_raw = pd.read_csv('listings_shanghai_09282021.csv')
data_raw
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | license | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24963 | Heart of French Built Music Conservatory / Home | 98203 | Jia | NaN | 徐汇区 / Xuhui District | 31.210730 | 121.451590 | Entire home/apt | 480 | 3 | 85 | 2019-11-22 | 0.78 | 1 | 240 | 0 | NaN |
| 1 | 322045 | 【sidihome】『Impression·Flower』ART studio downtown | 681552 | Leon | NaN | 静安区 / Jing'an District | 31.242400 | 121.444490 | Entire home/apt | 464 | 1 | 42 | 2017-11-13 | 0.51 | 16 | 242 | 0 | NaN |
| 2 | 402315 | Sidihome·Moon Treasures SIMPLE APT IN DOWNTOWN | 681552 | Leon | NaN | 静安区 / Jing'an District | 31.242870 | 121.443550 | Entire home/apt | 445 | 1 | 27 | 2012-07-07 | 0.24 | 16 | 333 | 7 | NaN |
| 3 | 479499 | 【sidihome】Plum flower Luxury movie apt in JING'AN | 681552 | Leon | NaN | 静安区 / Jing'an District | 31.243660 | 121.443960 | Entire home/apt | 464 | 1 | 28 | 2013-09-25 | 0.25 | 16 | 360 | 0 | NaN |
| 4 | 479506 | 【Sidihome】ENGLAND type cozy studio in downtown | 681552 | Leon | NaN | 静安区 / Jing'an District | 31.242110 | 121.442590 | Entire home/apt | 407 | 1 | 34 | 2016-12-29 | 0.33 | 16 | 41 | 0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 27800 | 52504269 | 海上避风港湾 | 422241715 | 越努力,越幸运 | NaN | 松江区 / Songjiang District | 31.141783 | 121.327689 | Entire home/apt | 120 | 1 | 0 | NaN | NaN | 27 | 359 | 0 | NaN |
| 27801 | 52505271 | 【昕舍·城】皇冠双人房/上海火车站/人民广场/南京东路/外滩/东方明珠/静安寺/城隍庙/迪士... | 161080396 | 昕舍 | NaN | 静安区 / Jing'an District | 31.238201 | 121.446381 | Private room | 555 | 1 | 0 | NaN | NaN | 61 | 358 | 0 | NaN |
| 27802 | 52505580 | 【昕舍·城】皇冠双人房/上海火车站/人民广场/外滩/南京东路/东方明珠/城隍庙/静安寺/地铁... | 161080396 | 昕舍 | NaN | 静安区 / Jing'an District | 31.238201 | 121.446381 | Entire home/apt | 555 | 1 | 0 | NaN | NaN | 61 | 358 | 0 | NaN |
| 27803 | 52506106 | 【昕舍·宿】皇冠双人房/上海火车站/人民广场/南京东路/外滩/东方明珠/城隍庙/静安寺/地铁... | 161080396 | 昕舍 | NaN | 静安区 / Jing'an District | 31.263439 | 121.460289 | Private room | 555 | 1 | 0 | NaN | NaN | 61 | 358 | 0 | NaN |
| 27804 | 52506281 | 虹桥区工作十分钟,会展旁,地铁口徐泾东。 | 315260924 | 毅 | NaN | 青浦区 / Qingpu District | 31.186459 | 121.289703 | Entire home/apt | 199 | 90 | 0 | NaN | NaN | 3 | 359 | 0 | NaN |
27805 rows × 18 columns
data_raw.query('neighbourhood_group != "NaN"')
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | license |
|---|
data_raw.query('license != "NaN"')
| id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | license |
|---|
data = data_raw.query('reviews_per_month > 0 & price < 30000 & minimum_nights < 30')
data = data.drop(['neighbourhood_group', 'license'], 1)
data
| id | name | host_id | host_name | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24963 | Heart of French Built Music Conservatory / Home | 98203 | Jia | 徐汇区 / Xuhui District | 31.210730 | 121.451590 | Entire home/apt | 480 | 3 | 85 | 2019-11-22 | 0.78 | 1 | 240 | 0 |
| 1 | 322045 | 【sidihome】『Impression·Flower』ART studio downtown | 681552 | Leon | 静安区 / Jing'an District | 31.242400 | 121.444490 | Entire home/apt | 464 | 1 | 42 | 2017-11-13 | 0.51 | 16 | 242 | 0 |
| 2 | 402315 | Sidihome·Moon Treasures SIMPLE APT IN DOWNTOWN | 681552 | Leon | 静安区 / Jing'an District | 31.242870 | 121.443550 | Entire home/apt | 445 | 1 | 27 | 2012-07-07 | 0.24 | 16 | 333 | 7 |
| 3 | 479499 | 【sidihome】Plum flower Luxury movie apt in JING'AN | 681552 | Leon | 静安区 / Jing'an District | 31.243660 | 121.443960 | Entire home/apt | 464 | 1 | 28 | 2013-09-25 | 0.25 | 16 | 360 | 0 |
| 4 | 479506 | 【Sidihome】ENGLAND type cozy studio in downtown | 681552 | Leon | 静安区 / Jing'an District | 31.242110 | 121.442590 | Entire home/apt | 407 | 1 | 34 | 2016-12-29 | 0.33 | 16 | 41 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 27592 | 52417690 | 【乌托邦】白日梦 15号线祁安路地铁站、临近上海大学、静安大融城 | 163526627 | 戴月晔 | 普陀区 / Putuo District | 31.295555 | 121.388359 | Private room | 209 | 1 | 1 | 2021-09-26 | 1.00 | 3 | 363 | 1 |
| 27617 | 52431527 | 【榭舍】100寸巨幕投影仪|近中山公园、东华大学|延安西路2/4号线地铁站步行300M|可做饭 | 188252534 | King | 长宁区 / Changning District | 31.213061 | 121.417505 | Entire home/apt | 1028 | 1 | 3 | 2021-09-26 | 3.00 | 7 | 182 | 3 |
| 27668 | 52451273 | 【外滩轻奢·PLUS】全江景落地窗观景/外滩全景/听外滩钟声/俯瞰百年外滩「不接吵闹型活动聚会」 | 269261326 | 江先森 | 黄浦区 / Huangpu District | 31.220263 | 121.503426 | Entire home/apt | 2349 | 1 | 3 | 2021-09-28 | 3.00 | 4 | 363 | 3 |
| 27682 | 52461214 | 租界风情/徐家汇公园景观/胸科医院/宛平路梧桐大道/遍布咖啡馆和西餐厅 | 424640055 | 律 | 徐汇区 / Xuhui District | 31.198627 | 121.445412 | Entire home/apt | 489 | 1 | 1 | 2021-09-27 | 1.00 | 1 | 50 | 1 |
| 27706 | 52465589 | 中山公园龙之梦轻奢大三房,全新装修,地铁2/4号线,到中山公园,新天地、人民广场、静安寺、南京东路 | 68952375 | 剑峰 | 长宁区 / Changning District | 31.215880 | 121.419420 | Entire home/apt | 1485 | 2 | 1 | 2021-09-29 | 1.00 | 1 | 259 | 1 |
16910 rows × 16 columns
def cal_occupancy_rate(minimum_nights, reviews_per_month):
review_rate = 0.5
max_occupancy = 0.7
occupancy_per_month = reviews_per_month / review_rate * minimum_nights
exceed_max_occupancy = occupancy_per_month > max_occupancy
occupancy_per_month.loc[exceed_max_occupancy] = max_occupancy
occupancy_per_month_fillna = occupancy_per_month.fillna(0)
return occupancy_per_month_fillna
data['occupancy_rate'] = cal_occupancy_rate(data['minimum_nights'], data['reviews_per_month'])
data['monthly_rental_income'] = data['price'] * data['occupancy_rate'] * 30
#final
data
| id | name | host_id | host_name | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | occupancy_rate | monthly_rental_income | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24963 | Heart of French Built Music Conservatory / Home | 98203 | Jia | 徐汇区 / Xuhui District | 31.210730 | 121.451590 | Entire home/apt | 480 | 3 | 85 | 2019-11-22 | 0.78 | 1 | 240 | 0 | 0.70 | 10080.0 |
| 1 | 322045 | 【sidihome】『Impression·Flower』ART studio downtown | 681552 | Leon | 静安区 / Jing'an District | 31.242400 | 121.444490 | Entire home/apt | 464 | 1 | 42 | 2017-11-13 | 0.51 | 16 | 242 | 0 | 0.70 | 9744.0 |
| 2 | 402315 | Sidihome·Moon Treasures SIMPLE APT IN DOWNTOWN | 681552 | Leon | 静安区 / Jing'an District | 31.242870 | 121.443550 | Entire home/apt | 445 | 1 | 27 | 2012-07-07 | 0.24 | 16 | 333 | 7 | 0.48 | 6408.0 |
| 3 | 479499 | 【sidihome】Plum flower Luxury movie apt in JING'AN | 681552 | Leon | 静安区 / Jing'an District | 31.243660 | 121.443960 | Entire home/apt | 464 | 1 | 28 | 2013-09-25 | 0.25 | 16 | 360 | 0 | 0.50 | 6960.0 |
| 4 | 479506 | 【Sidihome】ENGLAND type cozy studio in downtown | 681552 | Leon | 静安区 / Jing'an District | 31.242110 | 121.442590 | Entire home/apt | 407 | 1 | 34 | 2016-12-29 | 0.33 | 16 | 41 | 0 | 0.66 | 8058.6 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 27592 | 52417690 | 【乌托邦】白日梦 15号线祁安路地铁站、临近上海大学、静安大融城 | 163526627 | 戴月晔 | 普陀区 / Putuo District | 31.295555 | 121.388359 | Private room | 209 | 1 | 1 | 2021-09-26 | 1.00 | 3 | 363 | 1 | 0.70 | 4389.0 |
| 27617 | 52431527 | 【榭舍】100寸巨幕投影仪|近中山公园、东华大学|延安西路2/4号线地铁站步行300M|可做饭 | 188252534 | King | 长宁区 / Changning District | 31.213061 | 121.417505 | Entire home/apt | 1028 | 1 | 3 | 2021-09-26 | 3.00 | 7 | 182 | 3 | 0.70 | 21588.0 |
| 27668 | 52451273 | 【外滩轻奢·PLUS】全江景落地窗观景/外滩全景/听外滩钟声/俯瞰百年外滩「不接吵闹型活动聚会」 | 269261326 | 江先森 | 黄浦区 / Huangpu District | 31.220263 | 121.503426 | Entire home/apt | 2349 | 1 | 3 | 2021-09-28 | 3.00 | 4 | 363 | 3 | 0.70 | 49329.0 |
| 27682 | 52461214 | 租界风情/徐家汇公园景观/胸科医院/宛平路梧桐大道/遍布咖啡馆和西餐厅 | 424640055 | 律 | 徐汇区 / Xuhui District | 31.198627 | 121.445412 | Entire home/apt | 489 | 1 | 1 | 2021-09-27 | 1.00 | 1 | 50 | 1 | 0.70 | 10269.0 |
| 27706 | 52465589 | 中山公园龙之梦轻奢大三房,全新装修,地铁2/4号线,到中山公园,新天地、人民广场、静安寺、南京东路 | 68952375 | 剑峰 | 长宁区 / Changning District | 31.215880 | 121.419420 | Entire home/apt | 1485 | 2 | 1 | 2021-09-29 | 1.00 | 1 | 259 | 1 | 0.70 | 31185.0 |
16910 rows × 18 columns
districts = geopandas.read_file('neighbourhoods.geojson')
districts
| neighbourhood | neighbourhood_group | geometry | |
|---|---|---|---|
| 0 | 青浦区 / Qingpu District | None | MULTIPOLYGON (((120.99306 30.95248, 120.99151 ... |
| 1 | 黄浦区 / Huangpu District | None | MULTIPOLYGON (((121.49028 31.24411, 121.48963 ... |
| 2 | 浦东新区 / Pudong | None | MULTIPOLYGON (((121.96165 31.20804, 121.96452 ... |
| 3 | 杨浦区 / Yangpu District | None | MULTIPOLYGON (((121.51592 31.34568, 121.51700 ... |
| 4 | 虹口区 / Hongkou District | None | MULTIPOLYGON (((121.47506 31.25179, 121.47512 ... |
| 5 | 静安区 / Jing'an District | None | MULTIPOLYGON (((121.47506 31.25179, 121.47626 ... |
| 6 | 宝山区 / Baoshan District | None | MULTIPOLYGON (((121.51847 31.34456, 121.51700 ... |
| 7 | 普陀区 / Putuo District | None | MULTIPOLYGON (((121.44540 31.24512, 121.44518 ... |
| 8 | 长宁区 / Changning District | None | MULTIPOLYGON (((121.34234 31.24280, 121.34275 ... |
| 9 | 徐汇区 / Xuhui District | None | MULTIPOLYGON (((121.47122 31.19020, 121.46955 ... |
| 10 | 闵行区 / Minhang District | None | MULTIPOLYGON (((121.55801 31.07697, 121.55818 ... |
| 11 | 嘉定区 / Jiading District | None | MULTIPOLYGON (((121.29654 31.49868, 121.29739 ... |
| 12 | 崇明区 / Chongming District | None | MULTIPOLYGON (((121.78813 31.32750, 121.78835 ... |
| 13 | 奉贤区 / Fengxian District | None | MULTIPOLYGON (((121.35302 30.90651, 121.35294 ... |
| 14 | 金山区 / Jinshan District | None | MULTIPOLYGON (((121.42155 30.68463, 121.42217 ... |
| 15 | 松江区 / Songjiang District | None | MULTIPOLYGON (((121.34752 30.91507, 121.34740 ... |
districts.crs
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
# There is just no official, or free unofficial shanghai map in shapefile.
# So this is the official data of area and population that I store in an excel file.
sh_basic_info = pd.read_excel('shanghai basic info.xlsx')
sh_basic_info
| neighbourhood | area | population | pop_density | |
|---|---|---|---|---|
| 0 | 全 市 | 6340.50 | 24237800.0 | 3822.695371 |
| 1 | 浦东新区 / Pudong | 1210.41 | 5550200.0 | 4585.388422 |
| 2 | 黄浦区 / Huangpu District | 20.46 | 653800.0 | 31955.034213 |
| 3 | 徐汇区 / Xuhui District | 54.76 | 1084400.0 | 19802.775749 |
| 4 | 长宁区 / Changning District | 38.30 | 694000.0 | 18120.104439 |
| 5 | 静安区 / Jing'an District | 36.88 | 1062800.0 | 28817.787419 |
| 6 | 普陀区 / Putuo District | 54.83 | 1281900.0 | 23379.536750 |
| 7 | 虹口区 / Hongkou District | 23.48 | 797000.0 | 33943.781942 |
| 8 | 杨浦区 / Yangpu District | 60.73 | 1312700.0 | 21615.346616 |
| 9 | 闵行区 / Minhang District | 370.75 | 2543500.0 | 6860.418071 |
| 10 | 宝山区 / Baoshan District | 270.99 | 2042300.0 | 7536.440459 |
| 11 | 嘉定区 / Jiading District | 464.20 | 1588900.0 | 3422.878070 |
| 12 | 金山区 / Jinshan District | 586.05 | 805000.0 | 1373.602935 |
| 13 | 松江区 / Songjiang District | 605.64 | 1762200.0 | 2909.649297 |
| 14 | 青浦区 / Qingpu District | 670.14 | 1219000.0 | 1819.022891 |
| 15 | 奉贤区 / Fengxian District | 687.39 | 1152000.0 | 1675.904508 |
| 16 | 崇明区 / Chongming District | 1185.49 | 688100.0 | 580.435094 |
districts = districts.merge(sh_basic_info, on = 'neighbourhood')
districts = districts.drop('neighbourhood_group', 1)
districts
| neighbourhood | geometry | area | population | pop_density | |
|---|---|---|---|---|---|
| 0 | 青浦区 / Qingpu District | MULTIPOLYGON (((120.99306 30.95248, 120.99151 ... | 670.14 | 1219000.0 | 1819.022891 |
| 1 | 黄浦区 / Huangpu District | MULTIPOLYGON (((121.49028 31.24411, 121.48963 ... | 20.46 | 653800.0 | 31955.034213 |
| 2 | 浦东新区 / Pudong | MULTIPOLYGON (((121.96165 31.20804, 121.96452 ... | 1210.41 | 5550200.0 | 4585.388422 |
| 3 | 杨浦区 / Yangpu District | MULTIPOLYGON (((121.51592 31.34568, 121.51700 ... | 60.73 | 1312700.0 | 21615.346616 |
| 4 | 虹口区 / Hongkou District | MULTIPOLYGON (((121.47506 31.25179, 121.47512 ... | 23.48 | 797000.0 | 33943.781942 |
| 5 | 静安区 / Jing'an District | MULTIPOLYGON (((121.47506 31.25179, 121.47626 ... | 36.88 | 1062800.0 | 28817.787419 |
| 6 | 宝山区 / Baoshan District | MULTIPOLYGON (((121.51847 31.34456, 121.51700 ... | 270.99 | 2042300.0 | 7536.440459 |
| 7 | 普陀区 / Putuo District | MULTIPOLYGON (((121.44540 31.24512, 121.44518 ... | 54.83 | 1281900.0 | 23379.536750 |
| 8 | 长宁区 / Changning District | MULTIPOLYGON (((121.34234 31.24280, 121.34275 ... | 38.30 | 694000.0 | 18120.104439 |
| 9 | 徐汇区 / Xuhui District | MULTIPOLYGON (((121.47122 31.19020, 121.46955 ... | 54.76 | 1084400.0 | 19802.775749 |
| 10 | 闵行区 / Minhang District | MULTIPOLYGON (((121.55801 31.07697, 121.55818 ... | 370.75 | 2543500.0 | 6860.418071 |
| 11 | 嘉定区 / Jiading District | MULTIPOLYGON (((121.29654 31.49868, 121.29739 ... | 464.20 | 1588900.0 | 3422.878070 |
| 12 | 崇明区 / Chongming District | MULTIPOLYGON (((121.78813 31.32750, 121.78835 ... | 1185.49 | 688100.0 | 580.435094 |
| 13 | 奉贤区 / Fengxian District | MULTIPOLYGON (((121.35302 30.90651, 121.35294 ... | 687.39 | 1152000.0 | 1675.904508 |
| 14 | 金山区 / Jinshan District | MULTIPOLYGON (((121.42155 30.68463, 121.42217 ... | 586.05 | 805000.0 | 1373.602935 |
| 15 | 松江区 / Songjiang District | MULTIPOLYGON (((121.34752 30.91507, 121.34740 ... | 605.64 | 1762200.0 | 2909.649297 |
Just so you get a general idea of how Shanghai is. We are large. A city more than 1/5 area of Belgium. The outlying districts of course are populated, and with economic growth and urbanization, more and more people come to work in Shanghai, yet they might not have enough money to buy, or even rent, apartments in more central districts, so the outlying districts would see more population growth. But at this moment, most people still live in these orange and red areas. But don't get the idea wrong. They look compact on the map, but commuting from, say, Putuo District (dark red) to Pudong (orange, the largest district) would still take more than an hour, by our very efficient metro system.
classification = mapclassify.FisherJenks(districts['pop_density'], k=5)
bin_list = classification.bins.tolist()
bin_list.insert(0, districts['pop_density'].min())
map_pop_density = folium.Map(location = [31.22, 121.46], zoom_start = 9, min_zoom = 9, max_zoom = 13, tiles = 'cartodb positron', width=700, height=500)
folium.Choropleth(
geo_data = districts,
data = districts,
columns = ['neighbourhood', 'pop_density'],
key_on = 'feature.properties.neighbourhood',
fill_color = 'YlOrRd',
fill_opacity = 0.8,
bins = bin_list,
highlight = True,
legend_name = 'Population Density of Each District in Shanghai'
).add_to(map_pop_density)
map_pop_density
district1 = data.groupby('neighbourhood')['id'].agg('count').reset_index(name = 'listings_per_district')
district1
| neighbourhood | listings_per_district | |
|---|---|---|
| 0 | 嘉定区 / Jiading District | 388 |
| 1 | 奉贤区 / Fengxian District | 150 |
| 2 | 宝山区 / Baoshan District | 315 |
| 3 | 崇明区 / Chongming District | 843 |
| 4 | 徐汇区 / Xuhui District | 1533 |
| 5 | 普陀区 / Putuo District | 244 |
| 6 | 杨浦区 / Yangpu District | 403 |
| 7 | 松江区 / Songjiang District | 618 |
| 8 | 浦东新区 / Pudong | 6563 |
| 9 | 虹口区 / Hongkou District | 471 |
| 10 | 金山区 / Jinshan District | 95 |
| 11 | 长宁区 / Changning District | 518 |
| 12 | 闵行区 / Minhang District | 707 |
| 13 | 青浦区 / Qingpu District | 638 |
| 14 | 静安区 / Jing'an District | 1166 |
| 15 | 黄浦区 / Huangpu District | 2258 |
districts = districts.merge(district1, on = 'neighbourhood')
districts['listings_per_sqkm'] = districts['listings_per_district'] / districts['area']
districts
| neighbourhood | geometry | area | population | pop_density | listings_per_district | listings_per_sqkm | |
|---|---|---|---|---|---|---|---|
| 0 | 青浦区 / Qingpu District | MULTIPOLYGON (((120.99306 30.95248, 120.99151 ... | 670.14 | 1219000.0 | 1819.022891 | 638 | 0.952040 |
| 1 | 黄浦区 / Huangpu District | MULTIPOLYGON (((121.49028 31.24411, 121.48963 ... | 20.46 | 653800.0 | 31955.034213 | 2258 | 110.361681 |
| 2 | 浦东新区 / Pudong | MULTIPOLYGON (((121.96165 31.20804, 121.96452 ... | 1210.41 | 5550200.0 | 4585.388422 | 6563 | 5.422130 |
| 3 | 杨浦区 / Yangpu District | MULTIPOLYGON (((121.51592 31.34568, 121.51700 ... | 60.73 | 1312700.0 | 21615.346616 | 403 | 6.635930 |
| 4 | 虹口区 / Hongkou District | MULTIPOLYGON (((121.47506 31.25179, 121.47512 ... | 23.48 | 797000.0 | 33943.781942 | 471 | 20.059625 |
| 5 | 静安区 / Jing'an District | MULTIPOLYGON (((121.47506 31.25179, 121.47626 ... | 36.88 | 1062800.0 | 28817.787419 | 1166 | 31.616052 |
| 6 | 宝山区 / Baoshan District | MULTIPOLYGON (((121.51847 31.34456, 121.51700 ... | 270.99 | 2042300.0 | 7536.440459 | 315 | 1.162405 |
| 7 | 普陀区 / Putuo District | MULTIPOLYGON (((121.44540 31.24512, 121.44518 ... | 54.83 | 1281900.0 | 23379.536750 | 244 | 4.450119 |
| 8 | 长宁区 / Changning District | MULTIPOLYGON (((121.34234 31.24280, 121.34275 ... | 38.30 | 694000.0 | 18120.104439 | 518 | 13.524804 |
| 9 | 徐汇区 / Xuhui District | MULTIPOLYGON (((121.47122 31.19020, 121.46955 ... | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 |
| 10 | 闵行区 / Minhang District | MULTIPOLYGON (((121.55801 31.07697, 121.55818 ... | 370.75 | 2543500.0 | 6860.418071 | 707 | 1.906945 |
| 11 | 嘉定区 / Jiading District | MULTIPOLYGON (((121.29654 31.49868, 121.29739 ... | 464.20 | 1588900.0 | 3422.878070 | 388 | 0.835847 |
| 12 | 崇明区 / Chongming District | MULTIPOLYGON (((121.78813 31.32750, 121.78835 ... | 1185.49 | 688100.0 | 580.435094 | 843 | 0.711098 |
| 13 | 奉贤区 / Fengxian District | MULTIPOLYGON (((121.35302 30.90651, 121.35294 ... | 687.39 | 1152000.0 | 1675.904508 | 150 | 0.218217 |
| 14 | 金山区 / Jinshan District | MULTIPOLYGON (((121.42155 30.68463, 121.42217 ... | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 |
| 15 | 松江区 / Songjiang District | MULTIPOLYGON (((121.34752 30.91507, 121.34740 ... | 605.64 | 1762200.0 | 2909.649297 | 618 | 1.020408 |
districts.plot(column = 'listings_per_sqkm')
<AxesSubplot:>
So, although "浦东新区 / Pudong" has the highest number of listings (because it's a really big district - biggest area)(see "district1"), but it's still in the central districts where the listings are most dense.
You'll see in the map below, that the distribution is quite similar to the one of population density. The listings are more concentrated in the central districts. (Cause these are where the fun stuff are. ;) )
classification = mapclassify.FisherJenks(districts['listings_per_sqkm'], k=5)
bin_list = classification.bins.tolist()
bin_list.insert(0, districts['listings_per_sqkm'].min())
map_listings = folium.Map(location = [31.22, 121.46], zoom_start = 9, min_zoom = 9, max_zoom = 13, tiles = 'cartodb positron', width=700, height=500)
folium.Choropleth(
geo_data = districts,
data = districts,
columns = ['neighbourhood', 'listings_per_sqkm'],
key_on = 'feature.properties.neighbourhood',
fill_color = 'YlOrRd',
fill_opacity = 0.8,
bins = bin_list,
legend_name = 'Number of Listings per km2'
).add_to(map_listings)
map_listings
data_geo = geopandas.GeoDataFrame(data, geometry = geopandas.points_from_xy(data['longitude'], data['latitude']), crs = 'EPSG:4326')
# Since both data sets are downloaded from airbnb, and they both use "CRS: EPSG:4326", no reprojection is needed.
data_geo
| id | name | host_id | host_name | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | occupancy_rate | monthly_rental_income | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24963 | Heart of French Built Music Conservatory / Home | 98203 | Jia | 徐汇区 / Xuhui District | 31.210730 | 121.451590 | Entire home/apt | 480 | 3 | 85 | 2019-11-22 | 0.78 | 1 | 240 | 0 | 0.70 | 10080.0 | POINT (121.45159 31.21073) |
| 1 | 322045 | 【sidihome】『Impression·Flower』ART studio downtown | 681552 | Leon | 静安区 / Jing'an District | 31.242400 | 121.444490 | Entire home/apt | 464 | 1 | 42 | 2017-11-13 | 0.51 | 16 | 242 | 0 | 0.70 | 9744.0 | POINT (121.44449 31.24240) |
| 2 | 402315 | Sidihome·Moon Treasures SIMPLE APT IN DOWNTOWN | 681552 | Leon | 静安区 / Jing'an District | 31.242870 | 121.443550 | Entire home/apt | 445 | 1 | 27 | 2012-07-07 | 0.24 | 16 | 333 | 7 | 0.48 | 6408.0 | POINT (121.44355 31.24287) |
| 3 | 479499 | 【sidihome】Plum flower Luxury movie apt in JING'AN | 681552 | Leon | 静安区 / Jing'an District | 31.243660 | 121.443960 | Entire home/apt | 464 | 1 | 28 | 2013-09-25 | 0.25 | 16 | 360 | 0 | 0.50 | 6960.0 | POINT (121.44396 31.24366) |
| 4 | 479506 | 【Sidihome】ENGLAND type cozy studio in downtown | 681552 | Leon | 静安区 / Jing'an District | 31.242110 | 121.442590 | Entire home/apt | 407 | 1 | 34 | 2016-12-29 | 0.33 | 16 | 41 | 0 | 0.66 | 8058.6 | POINT (121.44259 31.24211) |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 27592 | 52417690 | 【乌托邦】白日梦 15号线祁安路地铁站、临近上海大学、静安大融城 | 163526627 | 戴月晔 | 普陀区 / Putuo District | 31.295555 | 121.388359 | Private room | 209 | 1 | 1 | 2021-09-26 | 1.00 | 3 | 363 | 1 | 0.70 | 4389.0 | POINT (121.38836 31.29556) |
| 27617 | 52431527 | 【榭舍】100寸巨幕投影仪|近中山公园、东华大学|延安西路2/4号线地铁站步行300M|可做饭 | 188252534 | King | 长宁区 / Changning District | 31.213061 | 121.417505 | Entire home/apt | 1028 | 1 | 3 | 2021-09-26 | 3.00 | 7 | 182 | 3 | 0.70 | 21588.0 | POINT (121.41751 31.21306) |
| 27668 | 52451273 | 【外滩轻奢·PLUS】全江景落地窗观景/外滩全景/听外滩钟声/俯瞰百年外滩「不接吵闹型活动聚会」 | 269261326 | 江先森 | 黄浦区 / Huangpu District | 31.220263 | 121.503426 | Entire home/apt | 2349 | 1 | 3 | 2021-09-28 | 3.00 | 4 | 363 | 3 | 0.70 | 49329.0 | POINT (121.50343 31.22026) |
| 27682 | 52461214 | 租界风情/徐家汇公园景观/胸科医院/宛平路梧桐大道/遍布咖啡馆和西餐厅 | 424640055 | 律 | 徐汇区 / Xuhui District | 31.198627 | 121.445412 | Entire home/apt | 489 | 1 | 1 | 2021-09-27 | 1.00 | 1 | 50 | 1 | 0.70 | 10269.0 | POINT (121.44541 31.19863) |
| 27706 | 52465589 | 中山公园龙之梦轻奢大三房,全新装修,地铁2/4号线,到中山公园,新天地、人民广场、静安寺、南京东路 | 68952375 | 剑峰 | 长宁区 / Changning District | 31.215880 | 121.419420 | Entire home/apt | 1485 | 2 | 1 | 2021-09-29 | 1.00 | 1 | 259 | 1 | 0.70 | 31185.0 | POINT (121.41942 31.21588) |
16910 rows × 19 columns
data_geo_district = geopandas.sjoin(data_geo, districts)
data_geo_district
# An observation: If "data_geo" goes first, it is the geometry of listings that are retained. While if "districts" goes first, it is the geometry of districts that are retained. Hence later visualizations would be different.
# And I'm doing this spatial join just in case it takes points. Not much use for me, though. Because our actual districts are classified as "neighbourhoods" in airbnb.
| id | name | host_id | host_name | neighbourhood_left | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | number_of_reviews_ltm | occupancy_rate | monthly_rental_income | geometry | index_right | neighbourhood_right | area | population | pop_density | listings_per_district | listings_per_sqkm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 24963 | Heart of French Built Music Conservatory / Home | 98203 | Jia | 徐汇区 / Xuhui District | 31.210730 | 121.45159 | Entire home/apt | 480 | 3 | 85 | 2019-11-22 | 0.78 | 1 | 240 | 0 | 0.70 | 10080.0 | POINT (121.45159 31.21073) | 9 | 徐汇区 / Xuhui District | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 |
| 16 | 920228 | Xujiahui Gardern view room | 4944168 | Creed | 徐汇区 / Xuhui District | 31.196700 | 121.44157 | Private room | 303 | 3 | 9 | 2021-07-28 | 0.41 | 3 | 363 | 1 | 0.70 | 6363.0 | POINT (121.44157 31.19670) | 9 | 徐汇区 / Xuhui District | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 |
| 17 | 920440 | Study Room with a single bed 徐家汇中心城区书房带二种体验选择 | 4944168 | Creed | 徐汇区 / Xuhui District | 31.192320 | 121.44548 | Private room | 168 | 3 | 22 | 2018-05-07 | 0.35 | 3 | 290 | 3 | 0.70 | 3528.0 | POINT (121.44548 31.19232) | 9 | 徐汇区 / Xuhui District | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 |
| 24 | 1555953 | The best known luxery apartment | 8283128 | Yuan | 徐汇区 / Xuhui District | 31.205770 | 121.44745 | Private room | 376 | 2 | 13 | 2018-06-29 | 3.71 | 1 | 87 | 4 | 0.70 | 7896.0 | POINT (121.44745 31.20577) | 9 | 徐汇区 / Xuhui District | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 |
| 34 | 2805178 | Penthouse central park-view 150sqm | 14349658 | Alfred | 徐汇区 / Xuhui District | 31.195060 | 121.43928 | Entire home/apt | 1500 | 1 | 5 | 2016-11-01 | 0.08 | 1 | 358 | 0 | 0.16 | 7200.0 | POINT (121.43928 31.19506) | 9 | 徐汇区 / Xuhui District | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 18333 | 46018560 | 金山城市沙滩,阳台浴缸,ins风,智能家居,投影家庭影院ps4。 | 201500524 | 海斌 | 金山区 / Jinshan District | 30.756820 | 121.33515 | Entire home/apt | 950 | 1 | 38 | 2021-05-29 | 3.37 | 1 | 321 | 38 | 0.70 | 19950.0 | POINT (121.33515 30.75682) | 14 | 金山区 / Jinshan District | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 |
| 19374 | 46950675 | 榻榻米家庭套房 上海金山渔村 月夕 | 218991643 | 舒 | 金山区 / Jinshan District | 30.731350 | 121.37612 | Private room | 694 | 1 | 2 | 2021-08-13 | 0.72 | 9 | 298 | 2 | 0.70 | 14574.0 | POINT (121.37612 30.73135) | 14 | 金山区 / Jinshan District | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 |
| 20830 | 48237980 | 【整栋出租】古镇小资文艺独享代院小屋 | 17524407 | Veronica | 金山区 / Jinshan District | 30.887780 | 121.01527 | Entire home/apt | 954 | 1 | 4 | 2021-08-15 | 0.64 | 3 | 356 | 4 | 0.70 | 20034.0 | POINT (121.01527 30.88778) | 14 | 金山区 / Jinshan District | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 |
| 22836 | 49656736 | 城市沙滩鱼村万达loft复试情侣亲子房投影智能锁 | 258659865 | 小妹 | 金山区 / Jinshan District | 30.749838 | 121.34449 | Entire home/apt | 601 | 1 | 2 | 2021-09-04 | 1.76 | 4 | 360 | 2 | 0.70 | 12621.0 | POINT (121.34449 30.74984) | 14 | 金山区 / Jinshan District | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 |
| 25962 | 51497329 | 近尚跃骏马园马术俱乐部/车镜公园.聚会/轰趴/团建/棋牌/烧烤/KTV/旅游/度假别墅 | 236014065 | 梦颖 | 金山区 / Jinshan District | 30.834020 | 121.24115 | Entire home/apt | 4380 | 1 | 1 | 2021-08-24 | 0.79 | 2 | 175 | 1 | 0.70 | 91980.0 | POINT (121.24115 30.83402) | 14 | 金山区 / Jinshan District | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 |
16910 rows × 26 columns
Interesting... It's the outlying districts that have the highest average prices. And Mostly it's because of the "Entire home/apt". I guess they have really luxurious apartments in those areas.
And you'll see in the map below that it looks quite the opposite to the one visualizing density of listings.
data_price = data.groupby(['neighbourhood', 'room_type'])['price'].agg(['mean', 'sum', 'max', 'min']).sort_values(by = 'mean', ascending = False).reset_index()
data_price
| neighbourhood | room_type | mean | sum | max | min | |
|---|---|---|---|---|---|---|
| 0 | 崇明区 / Chongming District | Entire home/apt | 2740.523404 | 1288046 | 15888 | 150 |
| 1 | 青浦区 / Qingpu District | Entire home/apt | 1864.467213 | 682395 | 19800 | 70 |
| 2 | 浦东新区 / Pudong | Entire home/apt | 1460.540876 | 4001882 | 27916 | 71 |
| 3 | 金山区 / Jinshan District | Entire home/apt | 1459.833333 | 52554 | 6800 | 166 |
| 4 | 松江区 / Songjiang District | Entire home/apt | 1334.841379 | 580656 | 17540 | 92 |
| 5 | 奉贤区 / Fengxian District | Entire home/apt | 1300.368000 | 162546 | 8851 | 63 |
| 6 | 崇明区 / Chongming District | Private room | 936.927419 | 348537 | 16000 | 91 |
| 7 | 宝山区 / Baoshan District | Entire home/apt | 890.100000 | 142416 | 18999 | 118 |
| 8 | 黄浦区 / Huangpu District | Entire home/apt | 873.786859 | 1635729 | 19999 | 112 |
| 9 | 普陀区 / Putuo District | Entire home/apt | 816.401515 | 107765 | 9999 | 116 |
| 10 | 金山区 / Jinshan District | Private room | 749.491525 | 44220 | 3880 | 214 |
| 11 | 静安区 / Jing'an District | Entire home/apt | 749.317912 | 631675 | 13800 | 132 |
| 12 | 闵行区 / Minhang District | Entire home/apt | 739.056689 | 325924 | 28000 | 128 |
| 13 | 徐汇区 / Xuhui District | Entire home/apt | 735.734979 | 893918 | 9999 | 101 |
| 14 | 虹口区 / Hongkou District | Entire home/apt | 671.485632 | 233677 | 10000 | 134 |
| 15 | 长宁区 / Changning District | Entire home/apt | 638.685185 | 206934 | 8888 | 124 |
| 16 | 青浦区 / Qingpu District | Private room | 622.129771 | 162998 | 12189 | 57 |
| 17 | 嘉定区 / Jiading District | Entire home/apt | 598.790036 | 168260 | 8000 | 90 |
| 18 | 杨浦区 / Yangpu District | Entire home/apt | 549.000000 | 137799 | 5238 | 156 |
| 19 | 浦东新区 / Pudong | Private room | 540.847705 | 2038455 | 13286 | 54 |
| 20 | 黄浦区 / Huangpu District | Private room | 532.955390 | 143365 | 3909 | 85 |
| 21 | 奉贤区 / Fengxian District | Private room | 476.440000 | 11911 | 3309 | 65 |
| 22 | 静安区 / Jing'an District | Private room | 381.924242 | 100828 | 2000 | 80 |
| 23 | 徐汇区 / Xuhui District | Private room | 377.350943 | 99998 | 5484 | 46 |
| 24 | 松江区 / Songjiang District | Private room | 372.152941 | 63266 | 5967 | 70 |
| 25 | 宝山区 / Baoshan District | Private room | 359.739726 | 52522 | 9428 | 60 |
| 26 | 闵行区 / Minhang District | Private room | 342.644531 | 87717 | 10000 | 78 |
| 27 | 虹口区 / Hongkou District | Private room | 328.494949 | 32521 | 3100 | 90 |
| 28 | 普陀区 / Putuo District | Private room | 309.141414 | 30605 | 2000 | 65 |
| 29 | 长宁区 / Changning District | Private room | 299.976048 | 50096 | 3000 | 117 |
| 30 | 嘉定区 / Jiading District | Private room | 280.420000 | 28042 | 3229 | 65 |
| 31 | 黄浦区 / Huangpu District | Shared room | 278.418803 | 32575 | 6000 | 69 |
| 32 | 浦东新区 / Pudong | Shared room | 246.370370 | 13304 | 2208 | 59 |
| 33 | 杨浦区 / Yangpu District | Private room | 246.253731 | 32998 | 580 | 98 |
| 34 | 崇明区 / Chongming District | Shared room | 233.000000 | 233 | 233 | 233 |
| 35 | 长宁区 / Changning District | Shared room | 199.888889 | 5397 | 1888 | 65 |
| 36 | 青浦区 / Qingpu District | Shared room | 187.600000 | 1876 | 688 | 64 |
| 37 | 静安区 / Jing'an District | Shared room | 160.661017 | 9479 | 1000 | 66 |
| 38 | 虹口区 / Hongkou District | Shared room | 160.208333 | 3845 | 600 | 68 |
| 39 | 徐汇区 / Xuhui District | Shared room | 152.830189 | 8100 | 365 | 60 |
| 40 | 松江区 / Songjiang District | Shared room | 143.076923 | 1860 | 398 | 54 |
| 41 | 普陀区 / Putuo District | Shared room | 132.000000 | 1716 | 230 | 70 |
| 42 | 闵行区 / Minhang District | Shared room | 126.400000 | 1264 | 331 | 59 |
| 43 | 宝山区 / Baoshan District | Shared room | 107.666667 | 969 | 299 | 43 |
| 44 | 嘉定区 / Jiading District | Shared room | 106.142857 | 743 | 190 | 67 |
| 45 | 杨浦区 / Yangpu District | Shared room | 95.444444 | 1718 | 282 | 47 |
chart1 = alt.Chart(data_price)
chart1.mark_bar().encode(x = 'room_type', y = 'mean', tooltip = ['neighbourhood', 'room_type', 'mean']).facet(facet = 'neighbourhood')
data_price2 = data.groupby('neighbourhood')['price'].agg('mean').sort_values(ascending = False).reset_index(name = 'mean_price')
classification = mapclassify.FisherJenks(data_price2['mean_price'], k = 5)
bin_list = classification.bins.tolist()
bin_list.insert(0, data_price2['mean_price'].min())
map_price = folium.Map(location = [31.22, 121.46], zoom_start = 9, min_zoom = 9, max_zoom = 13, tiles = 'cartodb positron', width=700, height=500)
folium.Choropleth(
geo_data = districts,
data = data_price2,
columns = ['neighbourhood', 'mean_price'],
key_on = 'feature.properties.neighbourhood',
fill_color = 'YlOrRd',
fill_opacity = 0.8,
bins = bin_list,
legend_name = 'Mean price across neighbourhoods'
).add_to(map_price)
map_price
It looks actually quite similar to the price map. I guess the occupancy rates of those high-price apartments/rooms are not bad, so higher-price neighbourhoods also have higher monthly rental income.
districts2 = data.groupby('neighbourhood')['monthly_rental_income'].agg(['sum', 'mean']).sort_values(by = 'mean', ascending = False).reset_index()
districts2 = districts.merge(districts2, on = 'neighbourhood')
districts2
| neighbourhood | geometry | area | population | pop_density | listings_per_district | listings_per_sqkm | sum | mean | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 青浦区 / Qingpu District | MULTIPOLYGON (((120.99306 30.95248, 120.99151 ... | 670.14 | 1219000.0 | 1819.022891 | 638 | 0.952040 | 12849037.8 | 20139.557680 |
| 1 | 黄浦区 / Huangpu District | MULTIPOLYGON (((121.49028 31.24411, 121.48963 ... | 20.46 | 653800.0 | 31955.034213 | 2258 | 110.361681 | 33191238.6 | 14699.397077 |
| 2 | 浦东新区 / Pudong | MULTIPOLYGON (((121.96165 31.20804, 121.96452 ... | 1210.41 | 5550200.0 | 4585.388422 | 6563 | 5.422130 | 99564813.6 | 15170.625263 |
| 3 | 杨浦区 / Yangpu District | MULTIPOLYGON (((121.51592 31.34568, 121.51700 ... | 60.73 | 1312700.0 | 21615.346616 | 403 | 6.635930 | 2829858.0 | 7021.980149 |
| 4 | 虹口区 / Hongkou District | MULTIPOLYGON (((121.47506 31.25179, 121.47512 ... | 23.48 | 797000.0 | 33943.781942 | 471 | 20.059625 | 4965872.4 | 10543.253503 |
| 5 | 静安区 / Jing'an District | MULTIPOLYGON (((121.47506 31.25179, 121.47626 ... | 36.88 | 1062800.0 | 28817.787419 | 1166 | 31.616052 | 13491761.4 | 11570.978902 |
| 6 | 宝山区 / Baoshan District | MULTIPOLYGON (((121.51847 31.34456, 121.51700 ... | 270.99 | 2042300.0 | 7536.440459 | 315 | 1.162405 | 2807416.2 | 8912.432381 |
| 7 | 普陀区 / Putuo District | MULTIPOLYGON (((121.44540 31.24512, 121.44518 ... | 54.83 | 1281900.0 | 23379.536750 | 244 | 4.450119 | 2447065.8 | 10028.958197 |
| 8 | 长宁区 / Changning District | MULTIPOLYGON (((121.34234 31.24280, 121.34275 ... | 38.30 | 694000.0 | 18120.104439 | 518 | 13.524804 | 4651473.6 | 8979.678764 |
| 9 | 徐汇区 / Xuhui District | MULTIPOLYGON (((121.47122 31.19020, 121.46955 ... | 54.76 | 1084400.0 | 19802.775749 | 1533 | 27.994887 | 16983416.4 | 11078.549511 |
| 10 | 闵行区 / Minhang District | MULTIPOLYGON (((121.55801 31.07697, 121.55818 ... | 370.75 | 2543500.0 | 6860.418071 | 707 | 1.906945 | 6791313.0 | 9605.817539 |
| 11 | 嘉定区 / Jiading District | MULTIPOLYGON (((121.29654 31.49868, 121.29739 ... | 464.20 | 1588900.0 | 3422.878070 | 388 | 0.835847 | 3289513.2 | 8478.126804 |
| 12 | 崇明区 / Chongming District | MULTIPOLYGON (((121.78813 31.32750, 121.78835 ... | 1185.49 | 688100.0 | 580.435094 | 843 | 0.711098 | 23768466.6 | 28195.096797 |
| 13 | 奉贤区 / Fengxian District | MULTIPOLYGON (((121.35302 30.90651, 121.35294 ... | 687.39 | 1152000.0 | 1675.904508 | 150 | 0.218217 | 2588434.8 | 17256.232000 |
| 14 | 金山区 / Jinshan District | MULTIPOLYGON (((121.42155 30.68463, 121.42217 ... | 586.05 | 805000.0 | 1373.602935 | 95 | 0.162102 | 1102011.6 | 11600.122105 |
| 15 | 松江区 / Songjiang District | MULTIPOLYGON (((121.34752 30.91507, 121.34740 ... | 605.64 | 1762200.0 | 2909.649297 | 618 | 1.020408 | 9616754.4 | 15561.091262 |
plot_avg_income = districts2.plot(column = 'mean', legend = True, cmap = 'YlGnBu', scheme = 'FisherJenks', k = 6, figsize = [9, 9], alpha = 0.75, edgecolor = "grey")
cx.add_basemap(plot_avg_income, crs = districts.crs.to_string(), source = cx.providers.OpenStreetMap.Mapnik)
price1000 = data_geo.query('price >= 1000')
chart2 = alt.Chart(price1000)
chart2.mark_point().encode(x = 'occupancy_rate', y = 'price', tooltip = ['occupancy_rate', 'price'])
data_room_type = data.groupby('neighbourhood')['room_type'].value_counts(normalize = True).reset_index(name = 'percentage')
chart_room_type = alt.Chart(data_room_type)
chart_room_type.mark_bar().encode(x ='room_type' , y = 'percentage', tooltip = ['room_type', 'percentage']).facet(facet = 'neighbourhood')
data['host_id'].nunique()
5128
data.groupby('host_id')['monthly_rental_income'].agg(['count', 'sum', 'mean']).query('count > 1').sort_values(by = 'mean', ascending = False).reset_index()
# Here "mean" stands for "monthly rental income per listing"
| host_id | count | sum | mean | |
|---|---|---|---|---|
| 0 | 138616376 | 3 | 629937.0 | 209979.0 |
| 1 | 349759183 | 2 | 398181.0 | 199090.5 |
| 2 | 124347083 | 2 | 390999.0 | 195499.5 |
| 3 | 118433872 | 3 | 579166.8 | 193055.6 |
| 4 | 362868813 | 7 | 1320417.0 | 188631.0 |
| ... | ... | ... | ... | ... |
| 2371 | 187824760 | 2 | 960.0 | 480.0 |
| 2372 | 290007684 | 2 | 891.0 | 445.5 |
| 2373 | 79502671 | 2 | 757.2 | 378.6 |
| 2374 | 212925871 | 2 | 662.4 | 331.2 |
| 2375 | 148353317 | 4 | 1065.6 | 266.4 |
2376 rows × 4 columns
host_monthly_income = data.groupby('host_id')['monthly_rental_income'].agg(['count', 'sum', 'mean']).sort_values(by = 'sum', ascending = False).head(20).reset_index()
host_monthly_income
| host_id | count | sum | mean | |
|---|---|---|---|---|
| 0 | 53641962 | 23 | 2236883.4 | 97255.800000 |
| 1 | 362868813 | 7 | 1320417.0 | 188631.000000 |
| 2 | 217662003 | 17 | 1242366.0 | 73080.352941 |
| 3 | 5493846 | 31 | 1202544.0 | 38791.741935 |
| 4 | 349613801 | 24 | 1029948.0 | 42914.500000 |
| 5 | 34581541 | 55 | 972314.4 | 17678.443636 |
| 6 | 34875021 | 59 | 871758.6 | 14775.569492 |
| 7 | 240286703 | 11 | 842893.2 | 76626.654545 |
| 8 | 307991017 | 15 | 835926.0 | 55728.400000 |
| 9 | 276770135 | 55 | 831214.2 | 15112.985455 |
| 10 | 240517873 | 11 | 809003.4 | 73545.763636 |
| 11 | 180263042 | 27 | 778155.0 | 28820.555556 |
| 12 | 324525852 | 33 | 741246.6 | 22462.018182 |
| 13 | 302269565 | 10 | 720623.4 | 72062.340000 |
| 14 | 165525225 | 22 | 715202.4 | 32509.200000 |
| 15 | 60858732 | 45 | 698118.0 | 15513.733333 |
| 16 | 402172155 | 24 | 678552.0 | 28273.000000 |
| 17 | 358285023 | 16 | 672987.0 | 42061.687500 |
| 18 | 151052644 | 7 | 668400.0 | 95485.714286 |
| 19 | 122290382 | 18 | 655447.8 | 36413.766667 |
Since I can't read much from this table, I try to "normalize" the data a little bit.
host_monthly_income['avg_price_per_listing'] = host_monthly_income['mean'] / 30
host_monthly_income.sort_values(by = 'avg_price_per_listing', ascending = False)
| host_id | count | sum | mean | avg_price_per_listing | |
|---|---|---|---|---|---|
| 1 | 362868813 | 7 | 1320417.0 | 188631.000000 | 6287.700000 |
| 0 | 53641962 | 23 | 2236883.4 | 97255.800000 | 3241.860000 |
| 18 | 151052644 | 7 | 668400.0 | 95485.714286 | 3182.857143 |
| 7 | 240286703 | 11 | 842893.2 | 76626.654545 | 2554.221818 |
| 10 | 240517873 | 11 | 809003.4 | 73545.763636 | 2451.525455 |
| 2 | 217662003 | 17 | 1242366.0 | 73080.352941 | 2436.011765 |
| 13 | 302269565 | 10 | 720623.4 | 72062.340000 | 2402.078000 |
| 8 | 307991017 | 15 | 835926.0 | 55728.400000 | 1857.613333 |
| 4 | 349613801 | 24 | 1029948.0 | 42914.500000 | 1430.483333 |
| 17 | 358285023 | 16 | 672987.0 | 42061.687500 | 1402.056250 |
| 3 | 5493846 | 31 | 1202544.0 | 38791.741935 | 1293.058065 |
| 19 | 122290382 | 18 | 655447.8 | 36413.766667 | 1213.792222 |
| 14 | 165525225 | 22 | 715202.4 | 32509.200000 | 1083.640000 |
| 11 | 180263042 | 27 | 778155.0 | 28820.555556 | 960.685185 |
| 16 | 402172155 | 24 | 678552.0 | 28273.000000 | 942.433333 |
| 12 | 324525852 | 33 | 741246.6 | 22462.018182 | 748.733939 |
| 5 | 34581541 | 55 | 972314.4 | 17678.443636 | 589.281455 |
| 15 | 60858732 | 45 | 698118.0 | 15513.733333 | 517.124444 |
| 9 | 276770135 | 55 | 831214.2 | 15112.985455 | 503.766182 |
| 6 | 34875021 | 59 | 871758.6 | 14775.569492 | 492.518983 |
data['listings_per_host'] = data.groupby('host_id')['id'].transform('count')
# actually, there is a column called "calculated_host_listings_count", but just to be sure...
data.loc[data['listings_per_host'] > 1, 'prof_host'] = "True"
data.loc[data['listings_per_host'] <= 1, 'prof_host'] = "False"
owned_by_prof_host = data.groupby('neighbourhood')['prof_host'].value_counts(normalize = True).reset_index(name = 'owned_by_prof_host').query('prof_host == "True"').sort_values(by = 'owned_by_prof_host', ascending = False)
owned_by_prof_host
| neighbourhood | prof_host | owned_by_prof_host | |
|---|---|---|---|
| 16 | 浦东新区 / Pudong | True | 0.901722 |
| 18 | 虹口区 / Hongkou District | True | 0.872611 |
| 30 | 黄浦区 / Huangpu District | True | 0.870682 |
| 8 | 徐汇区 / Xuhui District | True | 0.812785 |
| 28 | 静安区 / Jing'an District | True | 0.812178 |
| 20 | 金山区 / Jinshan District | True | 0.810526 |
| 14 | 松江区 / Songjiang District | True | 0.799353 |
| 12 | 杨浦区 / Yangpu District | True | 0.799007 |
| 0 | 嘉定区 / Jiading District | True | 0.783505 |
| 4 | 宝山区 / Baoshan District | True | 0.777778 |
| 24 | 闵行区 / Minhang District | True | 0.766620 |
| 2 | 奉贤区 / Fengxian District | True | 0.760000 |
| 26 | 青浦区 / Qingpu District | True | 0.750784 |
| 22 | 长宁区 / Changning District | True | 0.739382 |
| 6 | 崇明区 / Chongming District | True | 0.658363 |
| 10 | 普陀区 / Putuo District | True | 0.635246 |
data['listings_per_host'] = data.groupby('host_id')['id'].transform('count')
data.loc[data['listings_per_host'] > 2, 'prof_host'] = "True"
data.loc[data['listings_per_host'] <= 2, 'prof_host'] = "False"
data.groupby('neighbourhood')['prof_host'].value_counts(normalize = True).reset_index(name = 'owned_by_prof_host').query('prof_host == "True"').sort_values(by = 'owned_by_prof_host', ascending = False)
| neighbourhood | prof_host | owned_by_prof_host | |
|---|---|---|---|
| 16 | 浦东新区 / Pudong | True | 0.841536 |
| 18 | 虹口区 / Hongkou District | True | 0.821656 |
| 30 | 黄浦区 / Huangpu District | True | 0.780779 |
| 8 | 徐汇区 / Xuhui District | True | 0.742335 |
| 20 | 金山区 / Jinshan District | True | 0.736842 |
| 28 | 静安区 / Jing'an District | True | 0.722127 |
| 12 | 杨浦区 / Yangpu District | True | 0.684864 |
| 14 | 松江区 / Songjiang District | True | 0.679612 |
| 24 | 闵行区 / Minhang District | True | 0.654880 |
| 0 | 嘉定区 / Jiading District | True | 0.654639 |
| 4 | 宝山区 / Baoshan District | True | 0.653968 |
| 22 | 长宁区 / Changning District | True | 0.631274 |
| 26 | 青浦区 / Qingpu District | True | 0.600313 |
| 2 | 奉贤区 / Fengxian District | True | 0.566667 |
| 7 | 崇明区 / Chongming District | True | 0.487544 |
| 11 | 普陀区 / Putuo District | True | 0.483607 |
map = folium.Map(location = [31.22, 121.46], zoom_start = 9, min_zoom = 9, max_zoom = 13, tiles = 'cartodb positron', width=700, height=500)
folium.Choropleth(
geo_data = districts,
data = owned_by_prof_host,
columns = ['neighbourhood', 'owned_by_prof_host'],
key_on = 'feature.properties.neighbourhood',
fill_color = 'YlOrRd',
fill_opacity = 0.8,
legend_name = 'Listings owned by professional hosts in each District'
).add_to(map)
folium.Circle(
radius = 6200,
location = [31.26650, 121.38530],
popup = "My base camp",
color = "crimson",
fill = False,
).add_to(map)
map
Why am I making this map? To show that this district is where I've been living for most of my life. This is also where many working-class people live, for decades, esp. from the 80s to 2000s. This is the district where listings are least owned by professional hosts. (And the prof hosts might not live here, they just own properties here.) ~ Hence hooray we modest proletarians. ~
A big difference! Six times!
data.groupby(['prof_host', 'host_id'])['monthly_rental_income'].agg('sum').groupby('prof_host').agg('mean').reset_index()
| prof_host | monthly_rental_income | |
|---|---|---|
| 0 | False | 18320.442196 |
| 1 | True | 108499.104663 |
data.hist(column = 'occupancy_rate', bins = 50)
array([[<AxesSubplot:title={'center':'occupancy_rate'}>]], dtype=object)
The total number of listings (after the data is cleaned) is 16910. So looks like around 2/3 listings hit max occupancy rate.
listings_max_occ = data.query('occupancy_rate == 0.7').groupby('neighbourhood')['id'].agg('count').reset_index(name = 'max_occ_listings')
listings_max_occ['max_occ_listings_per_sqkm'] = listings_max_occ['max_occ_listings'] / districts['area']
listings_max_occ
# I calculate 'max_occ_listings_per_sqkm' because I want to see if pudong has the highest number of listings that hit max occupancy rate due to its large area. But the result is pretty much the same.
| neighbourhood | max_occ_listings | max_occ_listings_per_sqkm | |
|---|---|---|---|
| 0 | 嘉定区 / Jiading District | 241 | 0.359626 |
| 1 | 奉贤区 / Fengxian District | 64 | 3.128055 |
| 2 | 宝山区 / Baoshan District | 189 | 0.156145 |
| 3 | 崇明区 / Chongming District | 350 | 5.763214 |
| 4 | 徐汇区 / Xuhui District | 1060 | 45.144804 |
| 5 | 普陀区 / Putuo District | 173 | 4.690889 |
| 6 | 杨浦区 / Yangpu District | 250 | 0.922543 |
| 7 | 松江区 / Songjiang District | 357 | 6.511034 |
| 8 | 浦东新区 / Pudong | 3988 | 104.125326 |
| 9 | 虹口区 / Hongkou District | 339 | 6.190650 |
| 10 | 金山区 / Jinshan District | 24 | 0.064734 |
| 11 | 长宁区 / Changning District | 371 | 0.799224 |
| 12 | 闵行区 / Minhang District | 428 | 0.361032 |
| 13 | 青浦区 / Qingpu District | 302 | 0.439343 |
| 14 | 静安区 / Jing'an District | 869 | 1.482809 |
| 15 | 黄浦区 / Huangpu District | 1705 | 2.815204 |
map_listings_max_occ = folium.Map(location = [31.22, 121.46], zoom_start = 9, min_zoom = 9, max_zoom = 13, tiles = 'cartodb positron', width=700, height=500)
folium.Choropleth(
geo_data = districts,
data = listings_max_occ,
columns = ['neighbourhood', 'max_occ_listings'],
key_on = 'feature.properties.neighbourhood',
fill_color = 'YlOrRd',
fill_opacity = 0.8,
highlight = True,
legend_name = 'Number of listings that hit the maximum occupancy rate'
).add_to(map_listings_max_occ)
map_listings_max_occ
plot_max_occ = data_geo.query('occupancy_rate == 0.7').plot(figsize = [50, 30])
cx.add_basemap(plot_max_occ, crs = data_geo.crs.to_string(), source = cx.providers.OpenStreetMap.Mapnik)
plot_max_occ
<AxesSubplot:>
data.query('occupancy_rate < 0.7').hist(column = 'occupancy_rate', bins = 50)
array([[<AxesSubplot:title={'center':'occupancy_rate'}>]], dtype=object)
For those listings whose occupancy rates are lower than 70%, I don't have much insight. I visualize them just to have a sense of completion.